"""
auth:xgt-python
datetime:2021/11/06
爬取4k壁纸
"""

import requests
import parsel
for page in range(2,11):
    url = f'http://www.netbian.com/1920x1080/index_{page}.htm'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36 Edg/95.0.1020.44'
    }
    response = requests.get(url=url,headers=headers)
    response.encoding = response.apparent_encoding  # 自动获取解码格式
    # 2. 获取数据，获取网页源代码
    # print(response.text) #html字符串数据 ：想要直接提取字符串数据内容，是用正则表达式
    # 3。解析数据，提取壁纸详情页的url
    # 解析方法:re正则表达式 xpath css选择器
    selector = parsel.Selector(response.text)
    lis = selector.css('.list li')
    for li in lis:
        # img_url = li.css('img::attr(src)').get()
        title = li.css('b::text').get()
        if title:
            href = 'http://www.netbian.com' + li.css('a::attr(href)').get()
            html_data = requests.get(url=href,headers=headers).text
            sel = parsel.Selector(html_data)
            img_url = sel.css('.pic img::attr(src)').get()
            img_content = requests.get(url=img_url,headers=headers).content
            with open('壁纸\\' + title + '.jpg',mode='wb') as file:
                file.write(img_content)
                print(title,img_url)
        # print(html_data)





